Introduction

This vignette provides an example for creating pvalues objects for the volcano3D pipeline using DESeq2 and limma.

This example consists of a case study from the PEAC rheumatoid arthritis project (Pathobiology of Early Arthritis Cohort). The methodology has been published in ‘Lewis, Myles J., et al. “Molecular portraits of early rheumatoid arthritis identify clinical and treatment response phenotypes.” Cell reports 28.9 (2019): 2455-2470.’ (DOI: 10.1016/j.celrep.2019.07.091) with an interactive web tool available at https://peac.hpc.qmul.ac.uk.

Getting Started

Install from CRAN

Not yet publicly available:

install.packages("volcano3D")

Install from Github

library(devtools)
install_github("KatrionaGoldmann/volcano3D")

library(volcano3D)

The sample data used in this vignette can be loaded from the volcano3Ddata package. This is only possible after volcano3D is imported.

install.packages("volcano3Ddata")

Setup

To create a pvalues data frame we require:

  1. the txi or expression data with columns representing different samples and rows representing different variables
  2. The sample data which contains information for each sample in rows.

These can both be loaded from the syn_txi dataset in the volcano3Ddata package.

library(volcano3Ddata)
data("syn_txi")

Check the alignment and make sure there are only three possible contrasts (in this case three unique Pathotypes)

if(! identical(syn_metadata$ID, colnames(syn_txi$abundance))){
    stop("mis-aligned")
}

if(length(levels(syn_metadata$Pathotype)) != 3){
    stop("The number of unique pathotypes must qual 3")
}

groups <- levels(syn_metadata$Pathotype)
contrasts <- c(paste(groups[1], groups[2], sep="-"), 
               paste(groups[2], groups[3], sep="-"), 
               paste(groups[3], groups[1], sep="-"))

Our comparisons of interest are set up as:

  • Fibroid-Lymphoid
  • Lymphoid-Myeloid
  • Myeloid-Fibroid

In this vignette we will outline two methods to determine the differential gene expression:

  1. DESeq
  2. Limma-voom

DESeq method

Use the DESeq package to calculate the differential expression between groups. The vignette for this package can be found here

library(DESeq2)
dds = DESeqDataSetFromTximport(txi = syn_txi, 
                               colData = syn_metadata, 
                               design = ~Pathotype+Batch+Gender)

dds_DE <- DESeq(dds)
dds_LRT <- DESeq(dds, test = "LRT", reduced = ~Batch+Gender, parallel = TRUE) 

Output the results into one dataframe. First get the results for each contrast:

Pvals_DESeq_DE <- lapply(contrasts, function(x){
    vars <- unlist(strsplit(x, split="-"))
    out <- results(dds_DE, contrast=c("Pathotype", vars))
    out <- out[match(rownames(syn_txi$counts), rownames(out)), ]
    out <- out[,c("pvalue", "padj", "log2FoldChange")]
})

Then get the multi-group test results. In this case we use the Likelihood ratio test - this compares the model using ~Pathotype+Batch+Gender to the reduced model using ~Batch+Gender.

LRT <- results(dds_LRT, parallel=TRUE)[, c("pvalue", "padj", "log2FoldChange")]
LRT <- LRT[match(rownames(syn_txi$counts), rownames(LRT)), ]

The results can then be combined into one pvalues data frame:

Pvals_DESeq <- cbind(Pvals_DESeq_DE[[1]], 
                     Pvals_DESeq_DE[[2]], 
                     Pvals_DESeq_DE[[3]], 
                     LRT)
colnames(Pvals_DESeq) <- c(paste(rep(contrasts, each=3), 
                                 rep(colnames(Pvals_DESeq_DE[[1]]), 3)), 
                           paste("LRT", colnames(LRT)))

Pvals_DESeq <- data.frame(Pvals_DESeq[complete.cases(Pvals_DESeq), ], 
                          check.names = FALSE)

Now we can inspect:

head(Pvals_DESeq) %>%
    kable() %>%
    kable_styling(font_size=8.7)
Fibroid-Lymphoid pvalue Fibroid-Lymphoid padj Fibroid-Lymphoid log2FoldChange Lymphoid-Myeloid pvalue Lymphoid-Myeloid padj Lymphoid-Myeloid log2FoldChange Myeloid-Fibroid pvalue Myeloid-Fibroid padj Myeloid-Fibroid log2FoldChange LRT pvalue LRT padj LRT log2FoldChange
A2M 0.7775361 0.8851935 -0.0478326 0.1563045 0.3384292 -0.2202768 0.1694654 0.4916679 0.2681094 0.2739571 0.4353436 0.0527816
A2ML1 0.0002106 0.0011118 1.6854024 0.0000030 0.0000793 -1.9299687 0.6348844 0.8441036 0.2445664 0.0000022 0.0000202 0.3983423
A4GALT 0.0000000 0.0000000 1.0248506 0.0000192 0.0003539 -0.5646665 0.0055222 0.1009600 -0.4601841 0.0000000 0.0000000 0.2121462
A4GNT 0.0238302 0.0610621 -1.1527859 0.6691773 0.8180681 -0.1857435 0.0197330 0.1893057 1.3385294 0.0732984 0.1583021 -0.1939520
AAAS 0.8574281 0.9383721 -0.0245296 0.6608837 0.8124146 0.0548697 0.8469982 0.9435306 -0.0303402 0.9075226 0.9711813 0.3523114
AACS 0.2815302 0.4322015 0.1729592 0.0630182 0.1821943 -0.2732040 0.5874802 0.8169632 0.1002448 0.1472059 0.2732938 -0.2881377

Limma-voom method

Use the Limma package to calculate the differential expression between groups. The limma vignette can be found here and limma-voom vignette here.

library(limma)
library(edgeR)

syn_tpm = syn_txi$counts

# build the design matrix and contrast matrix
design <- model.matrix(~ 0 + syn_metadata$Pathotype + syn_metadata$Batch)  
colnames(design) <- gsub("syn_metadata\\$Pathotype", "", colnames(design))
colnames(design) <- gsub("syn_metadata\\$Batch", "", colnames(design))

contrast.matrix <- makeContrasts(
    paste0(colnames(design)[1] , "-", colnames(design)[2]),
    paste0(colnames(design)[2] , "-", colnames(design)[3]),
    paste0(colnames(design)[3] , "-", colnames(design)[1]),
    levels = design)

# filter data and normalise
dge <- DGEList(counts = syn_tpm)     
keep <- filterByExpr(dge, design)
dge <- dge[keep, , keep.lib.sizes=FALSE]
dge <- calcNormFactors(dge)

# voom
v <- voom(dge, design, plot=FALSE)
fit1 <- lmFit(v, design)
fit <- contrasts.fit(fit1, contrast.matrix) 
fit <- eBayes(fit)

Now we can get the model fit results.

contrasts <- colnames(coefficients(fit))

Pvals_limma_DE <- lapply(contrasts, function(x){
    id = which(colnames(coefficients(fit)) == x)
    
    out <- topTable(fit, adjust.method = "fdr", coef= id, number=Inf, 
                    sort.by="none")
    rownames(out) <- make.names(out$ID, unique=T)
    out <- out[,c("P.Value", "adj.P.Val", "logFC")]
    colnames(out) <- c("pvalue", "padj", "log2FoldChange")
    out
})

Pvals_overall <- topTable(fit, coef=1:3, adjust.method="fdr", number=Inf, 
                          sort.by="none")[,c("P.Value", "adj.P.Val")]
colnames(Pvals_overall) <- c("pvalue", "padj")

The results can then be combined into one pvalues data frame:

Pvals_limma <- cbind(Pvals_limma_DE[[1]], 
                     Pvals_limma_DE[[2]], 
                     Pvals_limma_DE[[3]], 
                     Pvals_overall)

colnames(Pvals_limma) <- c(paste(rep(contrasts, each=3), 
                                 rep(colnames(Pvals_limma_DE[[1]]), 3)), 
                           paste("Overall", colnames(Pvals_overall)))

Now we can inspect:

head(Pvals_limma) %>%
    kable() %>%
    kable_styling(font_size=8.7)
Fibroid-Lymphoid pvalue Fibroid-Lymphoid padj Fibroid-Lymphoid log2FoldChange Lymphoid-Myeloid pvalue Lymphoid-Myeloid padj Lymphoid-Myeloid log2FoldChange Myeloid-Fibroid pvalue Myeloid-Fibroid padj Myeloid-Fibroid log2FoldChange Overall pvalue Overall padj
A2M 0.5170499 0.6366674 -0.1121413 0.2279382 0.4481883 -0.1979652 0.1303462 0.4559284 0.3101065 0.2869610 0.4293019
A2ML1 0.0144789 0.0390096 1.2145059 0.0602010 0.1877927 -0.8634615 0.5167596 0.7917087 -0.3510444 0.0239250 0.0629112
A4GALT 0.0000000 0.0000000 1.0208963 0.0000197 0.0006240 -0.5925725 0.0114855 0.1786983 -0.4283238 0.0000000 0.0000000
A4GNT 0.0967374 0.1782220 -0.9478684 0.8495848 0.9298853 0.0924637 0.1800195 0.5235976 0.8554047 0.2318880 0.3676664
AAAS 0.4801279 0.6027211 0.0810740 0.9907265 0.9954684 -0.0012020 0.5395954 0.8042311 -0.0798720 0.7611265 0.8432504
AACS 0.2047727 0.3179074 0.2305878 0.1038665 0.2708610 -0.2669746 0.8557094 0.9511441 0.0363868 0.1867949 0.3139590

Using the Pvalues Objects

Both the deseq and limma-voom pvalues objects can then be used to create various plots with the volcano3D package as outlined in the vignette.

First lets load in the expression data.

library(volcano3D)
library(volcano3Ddata)
data("syn_data")

With DESeq

# Curate the expression data
syn_exp = syn_rld 
rownames(syn_exp) = make.names(rownames(syn_exp), unique = T)

# Align the expression and pvalue data
syn_exp = syn_exp[intersect(rownames(Pvals_DESeq), rownames(syn_exp)), ]
Pvals_DESeq = Pvals_DESeq[intersect(rownames(Pvals_DESeq), rownames(syn_exp)), ]

syn_polar_deseq <- polar_coords(sampledata = syn_metadata,
                          contrast = "Pathotype",
                          pvalues = Pvals_DESeq,
                          expression = syn_exp,
                          p_col_suffix = "pvalue",
                          padj_col_suffix = "padj",
                          fc_col_suffix = "log2FoldChange",
                          multi_group_prefix = "LRT",
                          non_sig_name = "Not Significant",
                          significance_cutoff = 0.01,
                          label_column = NULL,
                          fc_cutoff = 0.1)
radial_plotly(polar = syn_polar_deseq, 
              colours=c("green3", "cyan", "gold2", "blue", "purple", "red"),
              label_rows = c("SLAMF6", "PARP16", "ITM2C"))
p <- volcano3D(syn_polar_deseq,
               label_rows = c("SLAMF6", "PARP16", "ITM2C"),
               label_size = 10,
               xy_aspectratio = 1,
               z_aspectratio = 0.9)

p %>% plotly::layout(legend = list(x = 100, y = 0.5))

With Limma

The only difference here is we change the multi_group_prefix to “Overall” and remove the fold change paramtere.

# Curate the expression data
syn_exp = syn_rld 
rownames(syn_exp) = make.names(rownames(syn_exp), unique = T)

# Align the expression and pvalue data
syn_exp = syn_exp[intersect(rownames(Pvals_limma), rownames(syn_exp)), ]
Pvals_limma = Pvals_limma[intersect(rownames(Pvals_limma), rownames(syn_exp)), ]

syn_polar_limma <- polar_coords(sampledata = syn_metadata,
                          contrast = "Pathotype",
                          pvalues = Pvals_limma,
                          expression = syn_exp,
                          p_col_suffix = "pvalue",
                          padj_col_suffix = "padj",
                          fc_col_suffix = NULL,
                          multi_group_prefix = "Overall",
                          non_sig_name = "Not Significant",
                          significance_cutoff = 0.01,
                          label_column = NULL,
                          fc_cutoff = 0.1)
radial_plotly(polar = syn_polar_limma, 
              olours=c("green3", "cyan", "gold2", "blue", "purple", "red"),
              label_rows = c("SLAMF6", "PARP16", "ITM2C"))
p <- volcano3D(syn_polar_limma,
               label_rows = c("SLAMF6", "PARP16", "ITM2C"),
               label_size = 10,
               xy_aspectratio = 1,
               z_aspectratio = 0.9)

p %>% plotly::layout(legend = list(x = 100, y = 0.5))